# import jieba.analyse
# import json
#
# text = "我想要一台手机，内存大于8G，硬盘小于512G"
# keywords = jieba.analyse.extract_tags(text)
# print(keywords)
# # Initialize the JSON structure
# result = {
#     "type": "手机",
#     "memory": "",
#     "harddrive": ""
# }
#
# # Map the keywords to the JSON structure
# for keyword in keywords:
#     if "内存" in keyword:
#         if "大于" in text:
#             result["memory"] = ">8G"
#         elif "小于" in text:
#             result["memory"] = "<8G"
#         elif "等于" in text:
#             result["memory"] = "=8G"
#     elif "硬盘" in keyword:
#         if "大于" in text:
#             result["harddrive"] = ">512G"
#         elif "小于" in text:
#             result["harddrive"] = "<512G"
#         elif "等于" in text:
#             result["harddrive"] = "=512G"
#
# # Convert the dictionary to a JSON string
# json_result = json.dumps(result, ensure_ascii=False, indent=4)
# print(json_result)

import jieba
import re
import json
import pymysql

db = pymysql.connect(
    host='localhost',
    user='root',
    password='1231',
    database='scrapy_demo'
)
main_sql = ''
def extract_keywords(sentence):
    # 使用 jieba 分词
    words = jieba.lcut(sentence)

    sqlStr = 'select * from pc_info where 1=1'

    print("words",words)
    # 初始结果
    result = {"type": None, "memory": None, "harddrive": None}

    # 匹配类型关键词
    for word in words:
        if word in ["手机", "电脑", "平板"]:  # 可以扩展更多类型
            result["type"] = word
            break

    # 匹配内存信息
    memory_pattern = re.search(r"内存.*?(大于|小于|等于|以上)(\d+G)", sentence)
    if memory_pattern:
        operator, value = memory_pattern.groups()
        operator = operator.replace("大于", ">").replace("小于", "<").replace("等于", "=")
        result["memory"] = f"{operator}{value.replace('G', '').replace('GB', '')}"
        sqlStr = sqlStr + ' and neicun_rongliang ' + operator + value.replace('G', '').replace('GB', '')

        # 匹配硬盘信息
        harddrive_pattern = re.search(r"硬盘.*?(大于|小于|等于|以上)(\d+G)", sentence)
        if harddrive_pattern:
            operator, value = harddrive_pattern.groups()
        operator = operator.replace("大于", ">").replace("小于", "<").replace("等于", "=")
        result["harddrive"] = f"{operator}{value.replace('G', '').replace('GB', '')}"
        sqlStr = sqlStr + ' and yingpan_rongliang ' + operator + value.replace('G', '').replace('GB', '')
        print(sqlStr)
    main_sql = sqlStr
    print("xxxxxxxx----", main_sql)
    cursor = db.cursor()
    cursor.execute(main_sql)
    data = cursor.fetchall()
    print(data)
    return json.dumps(result, ensure_ascii=False)


# 示例句子
sentence = "我想要一台电脑，内存要8G以上，硬盘要512G以上"
output = extract_keywords(sentence)
# output = jieba.lcut(sentence)
print(output)